The
decorrelation
DEdataframe <- IDeA(dataframe,verbose=TRUE,thr=thro)
#>
#> pregnant_x_pressure_x_age pregnant_x_pressure_x_insulin pregnant_x_pressure_x_pedigree insulin_x_mass_x_pedigree glucose_x_triceps_x_insulin pressure_x_insulin_x_age
#> pregnant glucose pressure triceps insulin mass
#> 0.83695652 0.03260870 0.01086957 0.13043478 0.54347826 0.04347826
#>
#> Included: 92 , Uni p: 0.001630435 , Base Size: 11 , Rcrit: 0.1482456
#>
#>
1 <R=0.986,thr=0.950>, Top: 15< 3 >[Fa= 15 ]( 15 , 39 , 0 ),<|><>Tot Used: 54 , Added: 39 , Zero Std: 0 , Max Cor: 0.965
#>
2 <R=0.965,thr=0.950>, Top: 2< 2 >[Fa= 17 ]( 2 , 4 , 15 ),<|><>Tot Used: 57 , Added: 4 , Zero Std: 0 , Max Cor: 0.950
#>
3 <R=0.950,thr=0.950>, Top: 1< 1 >[Fa= 18 ]( 1 , 1 , 17 ),<|><>Tot Used: 57 , Added: 1 , Zero Std: 0 , Max Cor: 0.949
#>
4 <R=0.949,thr=0.900>, Top: 22< 2 >[Fa= 29 ]( 22 , 32 , 18 ),<|><>Tot Used: 83 , Added: 32 , Zero Std: 0 , Max Cor: 0.949
#>
5 <R=0.949,thr=0.900>, Top: 5< 1 >[Fa= 34 ]( 5 , 5 , 29 ),<|><>Tot Used: 84 , Added: 5 , Zero Std: 0 , Max Cor: 0.900
#>
6 <R=0.900,thr=0.800>, Top: 20< 1 >[Fa= 40 ]( 16 , 24 , 34 ),<|><>Tot Used: 87 , Added: 24 , Zero Std: 0 , Max Cor: 0.858
#>
7 <R=0.858,thr=0.800>, Top: 2< 1 >[Fa= 41 ]( 2 , 2 , 40 ),<|><>Tot Used: 89 , Added: 2 , Zero Std: 0 , Max Cor: 0.851
#>
8 <R=0.851,thr=0.800>, Top: 2< 1 >[Fa= 41 ]( 1 , 1 , 41 ),<|><>Tot Used: 89 , Added: 1 , Zero Std: 0 , Max Cor: 0.797
#>
9 <R=0.797,thr=0.800>
#>
[ 9 ], 0.7969239 Decor Dimension: 89 Nused: 89 . Cor to Base: 62 , ABase: 92 , Outcome Base: 0
#>
varlistc <- colnames(DEdataframe)[colnames(DEdataframe) != outcome]
pander::pander(sum(apply(dataframe[,varlist],2,var)))
4.99e+12
pander::pander(sum(apply(DEdataframe[,varlistc],2,var)))
1.05e+12
pander::pander(entropy(discretize(unlist(dataframe[,varlist]), 256)))
1.13
pander::pander(entropy(discretize(unlist(DEdataframe[,varlistc]), 256)))
0.918
The decorrelation
matrix
if (!largeSet)
{
par(cex=0.6,cex.main=0.85,cex.axis=0.7)
UPLTM <- attr(DEdataframe,"UPLTM")
gplots::heatmap.2(1.0*(abs(UPLTM)>0),
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
}

Formulas
Network
Displaying the features associations
par(op)
if ((ncol(dataframe) < 1000) && (ncol(dataframe)>10))
{
DEdataframeB <- ILAA(dataframe,verbose=TRUE,thr=thro,bootstrap=30)
transform <- 1*(attr(DEdataframeB,"UPLTM") != 0)
print(ncol(transform))
thrcol <- 1 + 0.025*nrow(transform)
rsum <- apply(1*(transform !=0),1,sum) > 2
csum <- apply(1*(transform !=0),2,sum) > thrcol | rsum
transform <- transform[csum,csum]
csum <- (apply(1*(transform !=0),2,sum) > 1) & (apply(1*(transform !=0),1,sum) > 1)
transform <- transform[csum,csum]
print(ncol(transform))
if (ncol(transform)>100)
{
thrcol <- 1 + 0.10*nrow(transform)
rsum <- apply(1*(transform !=0),1,sum) > 4
csum <- apply(1*(transform !=0),2,sum) > thrcol | rsum
transform <- transform[csum,csum]
csum <- (apply(1*(transform !=0),2,sum) > 3) & (apply(1*(transform !=0),1,sum) > 3)
transform <- transform[csum,csum]
}
print(ncol(transform))
if (ncol(transform)>100)
{
thrcol <- 1 + 0.20*nrow(transform)
rsum <- apply(1*(transform !=0),1,sum) > 8
csum <- apply(1*(transform !=0),2,sum) > thrcol | rsum
transform <- transform[csum,csum]
csum <- (apply(1*(transform !=0),2,sum) > 7) & (apply(1*(transform !=0),1,sum) > 7)
transform <- transform[csum,csum]
}
print(ncol(transform))
if ((ncol(transform) > 10) && (ncol(transform) < 150))
{
gplots::heatmap.2(transform,
trace = "none",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "Red Decorrelation matrix",
cexRow = cexheat,
cexCol = cexheat,
srtCol=45,
srtRow=45,
key.title=NA,
key.xlab="|Beta|>0",
xlab="Output Feature", ylab="Input Feature")
par(op)
colnames(transform) <- str_remove_all(colnames(transform),"La_")
VertexSize <- apply(transform,2,mean)
VertexSize <- 5*VertexSize/max(VertexSize)
gr <- graph_from_adjacency_matrix(transform,mode = "directed",diag = FALSE,weighted=TRUE)
gr$layout <- layout_with_fr
fc <- cluster_optimal(gr)
plot(fc, gr,
edge.width = 0.5*E(gr)$weight,
vertex.size=VertexSize,
edge.arrow.size=0.5,
edge.arrow.width=0.5,
vertex.label.cex=0.65,
vertex.label.dist=1,
main="Feature Association")
}
}
#> fast | LM |
#> pregnant_x_pressure_x_age pregnant_x_pressure_x_insulin pregnant_x_pressure_x_pedigree insulin_x_mass_x_pedigree glucose_x_triceps_x_insulin pressure_x_insulin_x_age
#> pregnant glucose pressure triceps insulin mass
#> 0.83695652 0.03260870 0.01086957 0.13043478 0.54347826 0.04347826
#>
#> Included: 92 , Uni p: 0.001630435 , Base Size: 11 , Rcrit: 0.1482456
#>
#>
1 <R=0.986,thr=0.950>, Top: 15< 3 >[Fa= 15 ]( 15 , 39 , 0 ),<|><>Tot Used: 54 , Added: 39 , Zero Std: 0 , Max Cor: 0.965
#>
2 <R=0.965,thr=0.950>, Top: 2< 2 >[Fa= 17 ]( 2 , 4 , 15 ),<|><>Tot Used: 57 , Added: 4 , Zero Std: 0 , Max Cor: 0.950
#>
3 <R=0.950,thr=0.950>, Top: 1< 1 >[Fa= 18 ]( 1 , 1 , 17 ),<|><>Tot Used: 57 , Added: 1 , Zero Std: 0 , Max Cor: 0.949
#>
4 <R=0.949,thr=0.900>, Top: 22< 2 >[Fa= 29 ]( 22 , 32 , 18 ),<|><>Tot Used: 83 , Added: 32 , Zero Std: 0 , Max Cor: 0.949
#>
5 <R=0.949,thr=0.900>, Top: 5< 1 >[Fa= 34 ]( 5 , 5 , 29 ),<|><>Tot Used: 84 , Added: 5 , Zero Std: 0 , Max Cor: 0.900
#>
6 <R=0.900,thr=0.800>, Top: 20< 1 >[Fa= 40 ]( 16 , 24 , 34 ),<|><>Tot Used: 87 , Added: 24 , Zero Std: 0 , Max Cor: 0.858
#>
7 <R=0.858,thr=0.800>, Top: 2< 1 >[Fa= 41 ]( 2 , 2 , 40 ),<|><>Tot Used: 89 , Added: 2 , Zero Std: 0 , Max Cor: 0.851
#>
8 <R=0.851,thr=0.800>, Top: 2< 1 >[Fa= 41 ]( 1 , 1 , 41 ),<|><>Tot Used: 89 , Added: 1 , Zero Std: 0 , Max Cor: 0.797
#>
9 <R=0.797,thr=0.800>
#>
[ 9 ], 0.7969239 Decor Dimension: 89 Nused: 89 . Cor to Base: 62 , ABase: 92 , Outcome Base: 0
#>
bootstrapping->..............................
#>
[1] 89
#> [1] 71
#> [1] 71
#> [1] 71


par(op)
Comparing ILAA vs
PCA vs EFA
PCA
featuresnames <- colnames(dataframe)[colnames(dataframe) != outcome]
pc <- prcomp(dataframe[,iscontinous],center = TRUE,scale. = TRUE) #principal components
predPCA <- predict(pc,dataframe[,iscontinous])
PCAdataframe <- as.data.frame(cbind(predPCA,dataframe[,!iscontinous]))
colnames(PCAdataframe) <- c(colnames(predPCA),colnames(dataframe)[!iscontinous])
#plot(PCAdataframe[,colnames(PCAdataframe)!=outcome],col=dataframe[,outcome],cex=0.65,cex.lab=0.5,cex.axis=0.75,cex.sub=0.5,cex.main=0.75)
#pander::pander(pc$rotation)
PCACor <- cor(PCAdataframe[,colnames(PCAdataframe) != outcome])
gplots::heatmap.2(abs(PCACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "PCA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")

EFA
EFAdataframe <- dataframeScaled
if (length(iscontinous) < 2000)
{
topred <- min(length(iscontinous),nrow(dataframeScaled),ncol(predPCA)/2)
if (topred < 2) topred <- 2
uls <- fa(dataframeScaled[,iscontinous],nfactors=topred,rotate="varimax",warnings=FALSE) # EFA analysis
predEFA <- predict(uls,dataframeScaled[,iscontinous])
EFAdataframe <- as.data.frame(cbind(predEFA,dataframeScaled[,!iscontinous]))
colnames(EFAdataframe) <- c(colnames(predEFA),colnames(dataframeScaled)[!iscontinous])
EFACor <- cor(EFAdataframe[,colnames(EFAdataframe) != outcome])
gplots::heatmap.2(abs(EFACor),
trace = "none",
# scale = "row",
mar = c(5,5),
col=rev(heat.colors(5)),
main = "EFA Correlation",
cexRow = 0.5,
cexCol = 0.5,
srtCol=45,
srtRow= -45,
key.title=NA,
key.xlab="Pearson Correlation",
xlab="Feature", ylab="Feature")
}

Effect on CAR
modeling
par(op)
par(xpd = TRUE)
dataframe[,outcome] <- factor(dataframe[,outcome])
rawmodel <- rpart(paste(outcome,"~."),dataframe,control=rpart.control(maxdepth=3))
pr <- predict(rawmodel,dataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(rawmodel,main="Raw",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(rawmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,dataframe[,outcome]==0))
}

pander::pander(table(dataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.844 |
0.805 |
0.879 |
| 3 |
se |
0.777 |
0.696 |
0.845 |
| 4 |
sp |
0.878 |
0.832 |
0.915 |
| 6 |
diag.or |
25.032 |
14.380 |
43.574 |
par(op)
par(xpd = TRUE)
DEdataframe[,outcome] <- factor(DEdataframe[,outcome])
IDeAmodel <- rpart(paste(outcome,"~."),DEdataframe,control=rpart.control(maxdepth=3))
pr <- predict(IDeAmodel,DEdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(IDeAmodel,main="ILAA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(IDeAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,DEdataframe[,outcome]==0))
}

pander::pander(table(DEdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.819 |
0.777 |
0.856 |
| 3 |
se |
0.754 |
0.671 |
0.825 |
| 4 |
sp |
0.851 |
0.802 |
0.892 |
| 6 |
diag.or |
17.511 |
10.365 |
29.584 |
par(op)
par(xpd = TRUE)
PCAdataframe[,outcome] <- factor(PCAdataframe[,outcome])
PCAmodel <- rpart(paste(outcome,"~."),PCAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(PCAmodel,PCAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(PCAmodel,main="PCA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(PCAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,PCAdataframe[,outcome]==0))
}

pander::pander(table(PCAdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.824 |
0.783 |
0.860 |
| 3 |
se |
0.669 |
0.581 |
0.749 |
| 4 |
sp |
0.901 |
0.858 |
0.934 |
| 6 |
diag.or |
18.365 |
10.644 |
31.687 |
par(op)
EFA
EFAdataframe[,outcome] <- factor(EFAdataframe[,outcome])
EFAmodel <- rpart(paste(outcome,"~."),EFAdataframe,control=rpart.control(maxdepth=3))
pr <- predict(EFAmodel,EFAdataframe,type = "class")
ptab <- list(er="Error",detail=matrix(nrow=6,ncol=1))
if (length(unique(pr))>1)
{
plot(EFAmodel,main="EFA",branch=0.5,uniform = TRUE,compress = TRUE,margin=0.1)
text(EFAmodel, use.n = TRUE,cex=0.75)
ptab <- epiR::epi.tests(table(pr==0,EFAdataframe[,outcome]==0))
}

pander::pander(table(EFAdataframe[,outcome],pr))
pander::pander(ptab$detail[c(5,3,4,6),])
| 5 |
diag.ac |
0.801 |
0.758 |
0.839 |
| 3 |
se |
0.700 |
0.613 |
0.777 |
| 4 |
sp |
0.851 |
0.802 |
0.892 |
| 6 |
diag.or |
13.342 |
8.041 |
22.138 |
par(op)